Librosa, STFT, zero-padding
Reference frequency
Fig. 3.3 Table 3.1 Fig. 3.4 Fig. 3.5
Maybe discussion of "tuning"
Maybe discussion of "compression"
import os
import numpy as np
import scipy
import matplotlib
from matplotlib import pyplot as plt
import librosa
import pandas as pd
import IPython.display as ipd
import music21 as m21
%matplotlib inline
# read wav file
fn_wav = os.path.join('..', 'data', 'C3', 'FMP_C3_F03.wav')
x, Fs = librosa.load(fn_wav)
ipd.Audio(x, rate=Fs)
# compute stft
N = 2048
N = 4096 * 4
H = 512
w = scipy.signal.get_window('hann', N)
X = librosa.stft(x, n_fft=N, hop_length=H, win_length=N, window=w, pad_mode='constant')
t = librosa.frames_to_time(np.arange(X.shape[1]), sr=Fs, hop_length=H, n_fft=N)
freq = librosa.fft_frequencies(sr=Fs, n_fft=N)
# plot stft
fig = plt.figure(figsize=(15, 5))
left = min(t)
right = max(t)
lower = min(freq)
upper = max(freq)
eps = np.finfo(float).eps
Y = 20 * np.log10(eps + np.abs(X) ** 2)
plt.imshow(Y, origin='lower', aspect='auto', cmap='gray_r', extent=[left, right, lower, upper])
plt.clim([-30, 30])
plt.ylim([0, 4500])
plt.xlabel('Time (seconds)')
plt.ylabel('Frequency (Hz)')
plt.colorbar()
# Create a Rectangle patch
rect = matplotlib.patches.Rectangle((30, 0.5), 1, 4490, linewidth=2, edgecolor='r', facecolor='none')
plt.gca().add_patch(rect)
# pitch table
def F_pitch(p, a4_ref=440):
return 2 ** ((p - 69) / 12) * a4_ref
note_infos = []
for p in range(60, 73):
name = m21.note.Note(p).pitch.unicodeNameWithOctave
p_pitch = F_pitch(p)
p_pitch_lower = F_pitch(p - 0.5)
p_pitch_upper = F_pitch(p + 0.5)
bw = p_pitch_upper - p_pitch_lower
note_infos.append([name, p, p_pitch, p_pitch_lower, p_pitch_upper, bw])
df = pd.DataFrame(note_infos, columns=['Note', '$p$', '$F_\mathrm{pitch}(p)$', '$F_\mathrm{pitch}(p-0.5)$', '$F_\mathrm{pitch}(p+0.5)$', '$BW(p)$'])
html = df.to_html(index=False, float_format='%.2f')
html = html.replace('<table', '<table style="width: 66%"')
ipd.HTML(html)
# definition of Y_lf
def F_coef(k, Fs, N):
return k * Fs / N
def P(p, Fs, N, a4_ref=440):
lower = F_pitch(p - 0.5)
upper = F_pitch(p + 0.5)
k = np.arange(N // 2)
k_freq = F_coef(k, Fs, N)
mask = np.logical_and(lower <= k_freq, k_freq < upper)
return k[mask]
def get_Y_lf(Y, Fs, N, a4_ref=440):
Y_lf = np.zeros((128, Y.shape[1]))
for p in range(128):
k = P(p, Fs, N, a4_ref)
Y_lf[p, :] = Y[k, :].sum(axis=0)
return Y_lf
# computation of Y_lf
Y = np.abs(X) ** 2
Y_lf = get_Y_lf(Y, Fs, N)
# plotting of Y_lf
fig = plt.figure(figsize=(15, 5))
left = min(t)
right = max(t)
lower = 0
upper = 128
plt.imshow(20 * np.log10(eps + Y_lf), origin='lower', aspect='auto', cmap='gray_r', extent=[left, right, lower, upper])
plt.clim([-30, 30])
plt.ylim([21, 108])
plt.xlabel('Time (seconds)')
plt.ylabel('Frequency (pitch)')
plt.colorbar()
# Create a Rectangle patch
rect = matplotlib.patches.Rectangle((30, 0.5), 1, 127, linewidth=2, edgecolor='r', facecolor='none')
plt.gca().add_patch(rect)
# definition of C
def get_C(Y_lf):
C = np.zeros((12, Y_lf.shape[1]))
p = np.arange(128)
for c in range(12):
mask = (p % 12) == c
C[c, :] = Y_lf[mask, :].sum(axis=0)
return C
# computation of C
C = get_C(Y_lf)
# plotting of C
fig = plt.figure(figsize=(15, 5))
left = min(t)
right = max(t)
lower = 0
upper = 12
plt.imshow(20 * np.log10(eps + C), origin='lower', aspect='auto', cmap='gray_r', extent=[left, right, lower, upper])
plt.clim([0, 100])
plt.xlabel('Time (seconds)')
plt.ylabel('Chroma')
plt.colorbar()
plt.yticks(np.arange(12) + 0.5, [m21.note.Note(c).pitch.unicodeName for c in range(12)])
# Create a Rectangle patch
rect = matplotlib.patches.Rectangle((30, 0.0), 1, 12, linewidth=2, edgecolor='r', facecolor='none')
plt.gca().add_patch(rect)
# read wav file
fn_wav = os.path.join('..', 'data', 'C3', 'FMP_C3_F05.wav')
x, Fs = librosa.load(fn_wav)
ipd.Audio(x, rate=Fs)
# compute stft
N = 2048
N = 4096
H = 512
w = scipy.signal.get_window('hann', N)
X = librosa.stft(x, n_fft=N, hop_length=H, win_length=N, window=w, pad_mode='constant')
t = librosa.frames_to_time(np.arange(X.shape[1]), sr=Fs, hop_length=H, n_fft=N)
freq = librosa.fft_frequencies(sr=Fs, n_fft=N)
# computation of Y_lf
Y = np.abs(X) ** 2
Y_lf = get_Y_lf(Y, Fs, N)
# computation of C
C = get_C(Y_lf)
fig = plt.figure(figsize=(15, 10))
# plot stft
plt.subplot(2, 1, 1)
left = min(t)
right = max(t)
lower = 0
upper = 128
plt.imshow(20 * np.log10(eps + Y_lf), origin='lower', aspect='auto', cmap='gray_r', extent=[left, right, lower, upper])
plt.clim([20, 50])
plt.ylim([55, 92])
plt.xlabel('Time (seconds)')
plt.ylabel('Frequency (pitch)')
plt.colorbar()
# Create a Rectangle patch
rect = matplotlib.patches.Rectangle((30, 0.5), 1, 127, linewidth=2, edgecolor='r', facecolor='none')
plt.gca().add_patch(rect)
# plotting of C
plt.subplot(2, 1, 2)
left = min(t)
right = max(t)
lower = 0
upper = 12
plt.imshow(20 * np.log10(eps + C), origin='lower', aspect='auto', cmap='gray_r', extent=[left, right, lower, upper])
plt.clim([20, 80])
plt.xlabel('Time (seconds)')
plt.ylabel('Chroma')
plt.colorbar()
plt.yticks(np.arange(12) + 0.5, [m21.note.Note(c).pitch.unicodeName for c in range(12)])
# Create a Rectangle patch
rect = matplotlib.patches.Rectangle((30, 0.0), 1, 12, linewidth=2, edgecolor='r', facecolor='none')
plt.gca().add_patch(rect)
plt.tight_layout()
Acknowledgment: This notebook was created by Frank Zalkow and Meinard Müller.